OTU_table.t<-transpose.OTU(OTU_table)
ab<-table(unlist(OTU_table.t))
ab[1:20]
barplot(log(ab), las=1, xlab = "Abundand in matrix",
ylab = "log (frequency)",
ylim = c(0, max(log(ab))+2))
row.names(OTU_table)<-OTU_table$OTU_ID
OTU_table.OTU_ID<-OTU_table$OTU_ID
OTU_table$OTU_ID<-NULL
OTU_table<-OTU_table[,colSums(OTU_table[,c(1:(length(colnames(OTU_table))-1))])!=0]
OTU_table.t<-transpose.OTU(OTU_table)
OTU_table.ns<-OTU_table[rowSums(OTU_table[,c(1:(length(OTU_table)-1))])!=1,]
dim(OTU_table)
dim(OTU_table.ns)
OTU_table<-OTU_table.ns
OTU_table.t<-transpose.OTU(OTU_table)
# Seperate taxonomy
OTU_table$taxonomy<-as.character(OTU_table$taxonomy)
tax.classes <- c("kingdom", "phylum", "class", "order", "family", "genus", "species")
OTU_table.tax <- col.splitup(OTU_table, col="taxonomy", sep="; ",drop=TRUE, names=tax.classes)
OTU_table.tax$taxonomy<-OTU_table$taxonomy# Normal?
colnames(OTU.info2)
colnames(OTU.info2)[colnames(OTU.info2)=="Annual.mean.temperature...C."] <- "AnnualMeanTemperature"
colnames(OTU.info2)[colnames(OTU.info2)=="Mean.diurnal.temperature.range..mean.period.max.min.....C."] <- "MeanDiurnalTemperatureRange"
colnames(OTU.info2)[colnames(OTU.info2)=="Temperature.seasonality..C.of.V."] <- "TemperatureSeasonality"
colnames(OTU.info2)[colnames(OTU.info2)=="Max.temperature.of.warmest.week...C."] <- "MaxTemperature"
colnames(OTU.info2)[colnames(OTU.info2)=="Min.temperature.of.coldest.week...C."] <- "MinTemperature"
colnames(OTU.info2)[colnames(OTU.info2)=="Annual.precipitation..mm."] <- "AnnualPrecipitation"
colnames(OTU.info2)[colnames(OTU.info2)=="Precipitation.of.wettest.week..mm."] <- "MaxPrecipitation"
colnames(OTU.info2)[colnames(OTU.info2)=="Precipitation.of.driest.quarter..mm."] <- "MinPrecipitation"
colnames(OTU.info2)[colnames(OTU.info2)=="Precipitation.of.coldest.quarter..mm."] <- "ColdestPrecipitation"
colnames(OTU.info2)[colnames(OTU.info2)=="Precipitation.seasonality..C.of.V."] <- "PrecipitationSeasonality"
Mydata<-OTU.info2[,c(1:6,8:10,16:17,19,21,23)]
Y<-Mydata[,c(8:length(Mydata))]
covar_cor = cor(Y)
corrplot.mixed(covar_cor, upper = "ellipse", lower = "number")
Mydata_PCA = dudi.pca(df = Y, scannf = FALSE, nf = 2, center = TRUE, scale = TRUE)
Mydata_PCA$co
s.corcircle(Mydata_PCA$co)
Mydata_PCA$eig/sum(Mydata_PCA$eig)
colnames(Mydata)
Mydata$Lat <- scale(Mydata$Lat)
Mydata$Long <- scale(Mydata$Long)
Mydata$AnnualPrecipitation <- scale(Mydata$AnnualPrecipitation)
Mydata$AnnualMeanTemperature <- scale(Mydata$AnnualMeanTemperature)
Mydata$PrecipitationSeasonality <- scale(Mydata$PrecipitationSeasonality)
Mydata$TemperatureSeasonality <- scale(Mydata$TemperatureSeasonality)
Mydata$MeanDiurnalTemperatureRange<- scale(Mydata$MeanDiurnalTemperatureRange)
Mydata$MinTemperature <- scale(Mydata$MinTemperature)
Mydata$MaxTemperature <- scale(Mydata$MaxTemperature)
mod1<-lm(mean.Alpha.div ~ Lat + Long + AnnualPrecipitation + AnnualMeanTemperature + PrecipitationSeasonality + TemperatureSeasonality + MeanDiurnalTemperatureRange+ MinTemperature + MaxTemperature , data = Mydata)
car::qqp(OTU.info2$mean.Alpha.div, "norm")
plot(density(OTU.info2$mean.Alpha.div))
shapiro.test(OTU.info2$mean.Alpha.div)
bptest(mod1)
plot(mod1)
# Is endophyte diversity driven by any climatic data? NO
mod1.mLat<-update(mod1,.~. - Lat)
anova(mod1,mod1.mLat)
mod1.mLong<-update(mod1.mLat,.~. - Long)
anova(mod1.mLat,mod1.mLong)
mod1.mAnnualPrecipitation<-update(mod1.mLong,.~. - AnnualPrecipitation)
anova(mod1.mLong,mod1.mAnnualPrecipitation)
mod1.mAnnualMeanTemperature<-update(mod1.mAnnualPrecipitation,.~. - AnnualMeanTemperature)
anova(mod1.mAnnualPrecipitation,mod1.mAnnualMeanTemperature)
mod1.mPrecipitationSeasonality<-update(mod1.mAnnualMeanTemperature,.~. - PrecipitationSeasonality)
anova(mod1.mAnnualMeanTemperature, mod1.mPrecipitationSeasonality)
mod1.mTemperatureSeasonality<-update(mod1.mPrecipitationSeasonality,.~. - TemperatureSeasonality)
anova(mod1.mPrecipitationSeasonality,mod1.mTemperatureSeasonality)
mod1.mMeanDiurnalTemperatureRange<-update(mod1.mTemperatureSeasonality,.~. - MeanDiurnalTemperatureRange)
anova(mod1.mTemperatureSeasonality, mod1.mMeanDiurnalTemperatureRange)
# Min temperature is a significant predictor!
mod1.mMinTemperature<-update(mod1.mMeanDiurnalTemperatureRange,.~. - MinTemperature)
anova(mod1.mMeanDiurnalTemperatureRange, mod1.mMinTemperature)
# Max temperature is a significant predictor!
mod1.mMaxTemperature<-update(mod1.mMinTemperature,.~. - MaxTemperature)
anova(mod1.mMinTemperature, mod1.mMaxTemperature)
mod2<- lm(Alpha.div~Species,data=OTU.info)
summary(mod2)OTU.info$Species<-plyr::revalue(OTU.info$Species, c("Brassica_napus"="Brassica napus", "Brassica_oleracea"="Brassica oleracea","Cratoneuron"="Cratoneuron filicinum","Grimmia"="Grimmia pilifera","Microdesmis"="Microdesmis caseariifolia","Orchid"="Phalaenopsis","Oryza"="Oryza sativa","Paeonia_rockii"="Paeonia rockii","Paeonia_suffruticosa"="Paeonia suffruticosa","Pinus"="Pinus pinaster","Pinus_flexilis"="Pinus flexilis","Pylaisiella"="Pylaisia polyantha","Rothmannia"="Rothmannia macrophylla","Santiria"="Santiria apiculata","Solanum"="Solanum lycopersicum","Vitis"="Vitis vinifera","Zea"="Zea mays"))
# Alpha to secies
Adiv_species_OTU<-ggplot(OTU.info, aes(y=Alpha.div,x=Species))+
geom_boxplot(aes(color=Species))+
labs(y="Shannon diversity")+
theme_katherine()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")+
ggtitle("Alpha Diversity of OTUs by Species")
ggsave("Adiv_species.OTU_PLOT.pdf",Adiv_species_OTU)
# RDA
smry <- summary(formulaRDA)
df1 <- data.frame(smry$sites[,1:2]) # PC1 and PC2
df2 <- data.frame(smry$biplot[,1:2]) # loadings for PC1 and PC2
rda.plot <- ggplot(df1, aes(x=RDA1, y=RDA2)) +
geom_text(aes(label=rownames(df1)),size=3,position=position_jitter(width=.2,height=.2)) +
geom_point(aes(alpha=0.3)) +
geom_hline(yintercept=0, linetype="dotted") +
geom_vline(xintercept=0, linetype="dotted")
names<-c("Latitude","Longitude","\nMean Annual Temp", "Diurnal Temp Range", "\nTemp Seasonality", "Max Temp\n", "Min Temp", "Annual Precipitation\n", "Max Precipitation", "Precipitation Seasonality", "Min Precipitation", "Coldest Precipitation ")
rownames(df2)
(formulaRDA.PLOT<-rda.plot +
geom_segment(data=df2, aes(x=0, xend=RDA1, y=0, yend=RDA2),
color="red", arrow=arrow(length=unit(0.01,"npc"))) +
geom_text(data=df2,
aes(x=RDA1,y=RDA2,label=names,
hjust=0.5*(1-sign(RDA1)),vjust=0.5*(1-sign(RDA2))),
color="red", size=4)+
coord_cartesian(ylim = c(-0.8, 0.8),xlim = c(-1.3, 1.3)) +theme_katherine()+ theme(legend.position="none"))
ggsave("formulaRDA.OTU_PLOT.pdf",formulaRDA.PLOT)
# NMDS
#Make a matrix with no row or column equal to 0 (do not enclude the env variable (GM COVERAGE))
OTU_table.t2<-merge(OTU_table.t,X, by="SampleID")
OTU_table.t2$SampleID<-NULL
rownames.M<-OTU_table.t2$Species
OTU_table.t2$Species<-NULL
M <- as.matrix(OTU_table.t2)
M[is.na(M)] <- 0
which( colSums(M)==0 )
which( rowSums(M)==0 )
rownames(M) <- rownames.M
dist_M <- vegdist(M, method = "bray", binary = T)
#The metaMDS analysis could have done the distance matrix internally but i would rather control it since i have presence/abscence
meta.nmds <- metaMDS(dist_M, k=2, trymax = 2000)
str(meta.nmds)
stressplot(meta.nmds)
Species<-data.frame(Species=as.factor(rownames.M))
# envfit
envfit <- envfit(meta.nmds, env = Species, perm = 999) #standard envfit
envfit
#data for plotting
##NMDS points
NMDS.data<-Species
NMDS.data$NMDS1<-meta.nmds$points[ ,1]
NMDS.data$NMDS2<-meta.nmds$points[ ,2]
colnames(NMDS.data)[1] <- "Species"
mult <- 1 #multiplier for the arrows and text for envfit below. You can change this and then rerun the plot command.
library(ggplot2)
(OTU.nmds.gg1 <- ggplot(data = NMDS.data, aes(y = NMDS2, x = NMDS1))+
geom_point( aes(color = NMDS.data$Species), size = 1.5,alpha=0.6)+
coord_cartesian(xlim = c(-0.25,0.25),ylim = c(-0.1,0.1)) +
theme_katherine())
ggsave("nmds.OTU_PLOT.pdf",OTU.nmds.gg1)metagenome_predictionsmetagenome_predictions<-metagenome_predictions[,colSums(metagenome_predictions[,])!=0]
metagenome_predictions<-metagenome_predictions[rowSums(metagenome_predictions[,])!=0,]
metagenome_predictions.ns<-metagenome_predictions[,colSums(metagenome_predictions[,])!=1]
dim(metagenome_predictions)[1] 177 3424
dim(metagenome_predictions.ns)[1] 177 3407
metagenome_predictions<-metagenome_predictions.ns
Functions<-colnames(metagenome_predictions)
Samples<-rownames(metagenome_predictions)
metagenome_predictions.info<-data.frame(SampleID=Samples, OTU.Alpha.div=rep(NA, length(Samples)),Species=rep(NA, length(Samples)), Function.Alpha.div=rep(NA, length(Samples)))
which(sapply(OTU_table.t, is.character))named integer(0)
OTU_table.t$SampleID<-NULL
for(i in 1:(length(Samples))){
metagenome_predictions.info$SampleID[i]<-Samples[i]
metagenome_predictions.info$OTU.Alpha.div[i]<-diversity(OTU_table.t[rownames(OTU_table.t)==Samples[i],], index = "shannon", MARGIN = 1, base = exp(1))
metagenome_predictions.info$Function.Alpha.div[i]<-diversity(metagenome_predictions[rownames(metagenome_predictions)==Samples[i],], index = "shannon", MARGIN = 1, base = exp(1))
metagenome_predictions.info$Species[i]<-mapping_file[mapping_file$SampleID==Samples[i],]$Species
}
metagenome_predictions.info$Species<-as.factor(metagenome_predictions.info$Species)
metagenome_predictions.info %>% dplyr::group_by(Species) %>% summarise(mean.OTU.Alpha.div=mean(OTU.Alpha.div),mean.Function.Alpha.div=mean(Function.Alpha.div)) -> metagenome_predictions.info2
metagenome_predictions.info2<-merge(metagenome_predictions.info2, BioDatFull2, by="Species")# Normal?
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Annual.mean.temperature...C."] <- "AnnualMeanTemperature"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Mean.diurnal.temperature.range..mean.period.max.min.....C."] <- "MeanDiurnalTemperatureRange"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Temperature.seasonality..C.of.V."] <- "TemperatureSeasonality"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Max.temperature.of.warmest.week...C."] <- "MaxTemperature"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Min.temperature.of.coldest.week...C."] <- "MinTemperature"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Annual.precipitation..mm."] <- "AnnualPrecipitation"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Precipitation.of.wettest.week..mm."] <- "MaxPrecipitation"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Precipitation.of.driest.quarter..mm."] <- "MinPrecipitation"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Precipitation.of.coldest.quarter..mm."] <- "ColdestPrecipitation"
colnames(metagenome_predictions.info2)[colnames(metagenome_predictions.info2)=="Precipitation.seasonality..C.of.V."] <- "PrecipitationSeasonality"
Mydata<-metagenome_predictions.info2[,c(1:7,9:11,17,18,20,22,24)]
colnames(metagenome_predictions.info2)
Mydata$Lat <- scale(Mydata$Lat)
Mydata$Long <- scale(Mydata$Long)
Mydata$AnnualPrecipitation <- scale(Mydata$AnnualPrecipitation)
Mydata$AnnualMeanTemperature <- scale(Mydata$AnnualMeanTemperature)
Mydata$PrecipitationSeasonality <- scale(Mydata$PrecipitationSeasonality)
Mydata$TemperatureSeasonality <- scale(Mydata$TemperatureSeasonality)
Mydata$MeanDiurnalTemperatureRange<- scale(Mydata$MeanDiurnalTemperatureRange)
Mydata$MinTemperature <- scale(Mydata$MinTemperature)
Mydata$MaxTemperature <- scale(Mydata$MaxTemperature)
mod3<-lm(mean.Function.Alpha.div ~ Lat + Long + AnnualPrecipitation + AnnualMeanTemperature + PrecipitationSeasonality + TemperatureSeasonality + MeanDiurnalTemperatureRange+ MinTemperature + MaxTemperature , data = Mydata)
car::qqp(metagenome_predictions.info2$mean.Function.Alpha.div, "norm")
plot(density(metagenome_predictions.info2$mean.Function.Alpha.div))
shapiro.test(metagenome_predictions.info2$mean.Function.Alpha.div)
bptest(mod3)
plot(mod3)
summary(mod3)
# Is endophyte diversity driven by any climatic data? NO
mod3.mLat<-update(mod3,.~. - Lat)
anova(mod3,mod3.mLat)
mod3.mLong<-update(mod3.mLat,.~. - Long)
anova(mod3.mLat,mod3.mLong)
# Annual precipitation is significant!
mod3.mAnnualPrecipitation<-update(mod3.mLong,.~. - AnnualPrecipitation)
anova(mod3.mLong,mod3.mAnnualPrecipitation)
mod3.mAnnualMeanTemperature<-update(mod3.mLong,.~. - AnnualMeanTemperature)
anova(mod3.mLong,mod1.mAnnualMeanTemperature)
mod3.mPrecipitationSeasonality<-update(mod3.mAnnualMeanTemperature,.~. - PrecipitationSeasonality)
anova(mod3.mAnnualMeanTemperature, mod3.mPrecipitationSeasonality)
# Temperature seasonality is significant!
mod3.mTemperatureSeasonality<-update(mod3.mPrecipitationSeasonality,.~. - TemperatureSeasonality)
anova(mod3.mPrecipitationSeasonality,mod3.mTemperatureSeasonality)
mod3.mMeanDiurnalTemperatureRange<-update(mod3.mTemperatureSeasonality,.~. - MeanDiurnalTemperatureRange)
anova(mod3.mTemperatureSeasonality, mod3.mMeanDiurnalTemperatureRange)
# Min temperature is a SLIGHTLY significant predictor!
mod3.mMinTemperature<-update(mod3.mMeanDiurnalTemperatureRange,.~. - MinTemperature)
anova(mod3.mMeanDiurnalTemperatureRange, mod3.mMinTemperature)
mod3.mMaxTemperature<-update(mod3.mMinTemperature,.~. - MaxTemperature)
anova(mod3.mMinTemperature, mod3.mMaxTemperature)
mod4<- lm(Function.Alpha.div~Species,data=metagenome_predictions.info)
summary(mod4)X<-metagenome_predictions.info[,c(1,3)]
metagenome_predictions2<-metagenome_predictions
metagenome_predictions2$SampleID<-row.names(metagenome_predictions)
metagenome_predictions2<-merge(metagenome_predictions2,X, by="SampleID")
metagenome_predictions2$Species<-as.factor(metagenome_predictions2$Species)
metagenome_predictions2<-aggregate(metagenome_predictions2, by = list(species=metagenome_predictions2$Species), FUN=mean)
metagenome_predictions2$SampleID<-NULL
row.names(metagenome_predictions2)<-metagenome_predictions2$species
metagenome_predictions2$Species<-NULL
metagenome_predictions2$species<-NULL
sum(is.na(metagenome_predictions2))
metagenome_predictions2matched<-metagenome_predictions2[match(rownames(dist_matrix), rownames(metagenome_predictions2)), ]
beta_divMat<- vegdist(metagenome_predictions2matched, "bray",diag=T)
# Are the endophytes more similar in species that are more closely related phylogenically? NO!
mantel(beta_divMat,dist_matrix)
# are similarity in the endophyte composition driven by climate data?
Mydata$Species<-plyr::revalue(Mydata$Species, c("Brassica_napus"="Brassica napus", "Brassica_oleracea"="Brassica oleracea","Cratoneuron"="Cratoneuron filicinum","Grimmia"="Grimmia pilifera","Microdesmis"="Microdesmis caseariifolia","Orchid"="Phalaenopsis","Oryza"="Oryza sativa","Paeonia_rockii"="Paeonia rockii","Paeonia_suffruticosa"="Paeonia suffruticosa","Pinus"="Pinus pinaster","Pinus_flexilis"="Pinus flexilis","Pylaisiella"="Pylaisia polyantha","Rothmannia"="Rothmannia macrophylla","Santiria"="Santiria apiculata","Solanum"="Solanum lycopersicum","Vitis"="Vitis vinifera","Zea"="Zea mays"))
rownames(Mydata)<-Mydata$Species
metagenome_predictions2$species<-rownames(metagenome_predictions2)
metagenome_predictions2$species<-as.factor(metagenome_predictions2$species)
metagenome_predictions2$species<-plyr::revalue(metagenome_predictions2$species, c("Brassica_napus"="Brassica napus", "Brassica_oleracea"="Brassica oleracea","Cratoneuron"="Cratoneuron filicinum","Grimmia"="Grimmia pilifera","Microdesmis"="Microdesmis caseariifolia","Orchid"="Phalaenopsis","Oryza"="Oryza sativa","Paeonia_rockii"="Paeonia rockii","Paeonia_suffruticosa"="Paeonia suffruticosa","Pinus"="Pinus pinaster","Pinus_flexilis"="Pinus flexilis","Pylaisiella"="Pylaisia polyantha","Rothmannia"="Rothmannia macrophylla","Santiria"="Santiria apiculata","Solanum"="Solanum lycopersicum","Vitis"="Vitis vinifera","Zea"="Zea mays"))
rownames(metagenome_predictions2)<-metagenome_predictions2$species
metagenome_predictions2$species<-NULL
metagenome_predictions2.hell <- decostand(metagenome_predictions2, "hell")
Mydata<-Mydata[match(rownames(metagenome_predictions2), rownames(Mydata)), ]
var <-Mydata[,c(4:15)]
formulaRDA<- rda(metagenome_predictions2.hell ~ Lat + PrecipitationSeasonality + TemperatureSeasonality + MinTemperature + MaxTemperature+ MaxPrecipitation + ColdestPrecipitation, data=var, scale=F)
head(summary(formulaRDA))
RsquareAdj(formulaRDA)
anova(formulaRDA, permutations=1000)
anovaMARGIN<-anova.cca(formulaRDA, by="margin", permutations=1000)
varpart(metagenome_predictions2.hell, ~Lat + Long, ~ AnnualMeanTemperature + MeanDiurnalTemperatureRange + TemperatureSeasonality + MaxTemperature + MinTemperature, ~ AnnualPrecipitation + PrecipitationSeasonality + MaxPrecipitation+ MinPrecipitation + ColdestPrecipitation, data=var)ggsave("nmds.FUN_PLOTFAR.pdf",FUNFAR.nmds.gg1)Saving 7 x 7 in image